from re import match, S
from pprint import pprint as pp

from scrapy.selector import Selector
from pymysql import connect


class Mysql:

    def __init__(self):
        self.connection = connect(
            host="120.77.183.17",
            user="kxx",
            password="#iK1xl^XAAun*UI#",
            charset="utf8")

    def execute(self, sql_):

        with self.connection.cursor() as cursor:

            try:
                cursor.execute(sql_)
                self.connection.commit()
            except Exception as e:
                self.connection.rollback()
                raise e
            finally:
                self.connection.close()


def response(flow):
    if "https://www.qcc.com/search_adsearchmultilist" in flow.request.url:

        sql_str = """
        INSERT IGNORE INTO offline.`company_shenzhen` (
            NAME,
            TYPE,
            delegate,
            money,
            tel,
            email,
            address,
            url,
            create_time
        )
        VALUES """

        for company in Selector(text=flow.response.text).xpath('//table[@class="m_srchList"]/tbody/tr'):
            # 成立日期
            date = match(".*成立日期：(.*?)<", company.xpath("td[2]/p/span[2]").get())

            # 处理电话号码
            tel = match(".*话：(.*)", company.xpath("td[2]/p[2]/text()").get(), S).group(1).strip()
            tel = '' if tel == '-' else tel

            # 处理电子邮件
            email = match(".*邮箱：(.*)", company.xpath("td[2]/p[2]/span/text()").get(), S).group(1)
            email = '' if email == "-" else email

            # 处理注册资本
            money = match(".*资本：(.*)", company.xpath("td[2]/p/span[1]/text()").get()).group(1).strip()
            money = "" if money == "-" else money

            # 拼接sql
            sql_str += "("
            sql_str += f"'{company.xpath('td[2]/a/text()').get()}',"
            sql_str += "'{}',".format("纺织业")
            sql_str += f"'{company.xpath('td[2]/p[1]/a/text()').get()}',"
            sql_str += f"'{money}',"
            sql_str += f"'{tel}',"
            sql_str += f"'{email}',"
            sql_str += f"'{match('.*地址：(.*?)<', company.xpath('td[2]/p[3]').get(), S).group(1).strip()}',"
            sql_str += f"'{'https://www.qcc.com' + company.xpath('td[2]/a/@href').get()}',"
            sql_str += f"'{date.group(1)}'"
            sql_str += "),"

        # 处理sql
        sql = Mysql()

        sql.execute(sql_str[:-1])
